Data Import

In [1]:
# library(tidyverse) # metapackage with lots of helpful functions
library(knitr)
library(repr)

bike_data <- read.csv("metro-bike-share-trip-data.csv")  # read csv file 
# data 
# help(read.csv)
In [2]:
head(bike_data)
Trip.IDDurationStart.TimeEnd.TimeStarting.Station.IDStarting.Station.LatitudeStarting.Station.LongitudeEnding.Station.IDEnding.Station.LatitudeEnding.Station.LongitudeBike.IDPlan.DurationTrip.Route.CategoryPassholder.TypeStarting.Lat.LongEnding.Lat.Long
1912818 180 2016-07-07T04:17:00 2016-07-07T04:20:00 3014 34.05661 -118.2372 3014 34.05661 -118.2372 6281 30 Round Trip Monthly Pass {'longitude': '-118.23721', 'latitude': '34.0566101', 'needs_recoding': False}{'longitude': '-118.23721', 'latitude': '34.0566101', 'needs_recoding': False}
1919661 1980 2016-07-07T06:00:00 2016-07-07T06:33:00 3014 34.05661 -118.2372 3014 34.05661 -118.2372 6281 30 Round Trip Monthly Pass {'longitude': '-118.23721', 'latitude': '34.0566101', 'needs_recoding': False}{'longitude': '-118.23721', 'latitude': '34.0566101', 'needs_recoding': False}
1933383 300 2016-07-07T10:32:00 2016-07-07T10:37:00 3016 34.05290 -118.2416 3016 34.05290 -118.2416 5861 365 Round Trip Flex Pass {'longitude': '-118.24156', 'latitude': '34.0528984', 'needs_recoding': False}{'longitude': '-118.24156', 'latitude': '34.0528984', 'needs_recoding': False}
1944197 10860 2016-07-07T10:37:00 2016-07-07T13:38:00 3016 34.05290 -118.2416 3016 34.05290 -118.2416 5861 365 Round Trip Flex Pass {'longitude': '-118.24156', 'latitude': '34.0528984', 'needs_recoding': False}{'longitude': '-118.24156', 'latitude': '34.0528984', 'needs_recoding': False}
1940317 420 2016-07-07T12:51:00 2016-07-07T12:58:00 3032 34.04989 -118.2559 3032 34.04989 -118.2559 6674 0 Round Trip Walk-up {'longitude': '-118.25588', 'latitude': '34.0498886', 'needs_recoding': False}{'longitude': '-118.25588', 'latitude': '34.0498886', 'needs_recoding': False}
1944075 780 2016-07-07T12:51:00 2016-07-07T13:04:00 3021 34.04561 -118.2370 3054 34.03922 -118.2365 6717 30 One Way Monthly Pass {'longitude': '-118.23703', 'latitude': '34.0456085', 'needs_recoding': False}{'longitude': '-118.23649', 'latitude': '34.0392189', 'needs_recoding': False}
In [3]:
typeof(bike_data)
'list'

List of columns' names

In [4]:
cols <- colnames(bike_data)
print(cols)
 [1] "Trip.ID"                    "Duration"                  
 [3] "Start.Time"                 "End.Time"                  
 [5] "Starting.Station.ID"        "Starting.Station.Latitude" 
 [7] "Starting.Station.Longitude" "Ending.Station.ID"         
 [9] "Ending.Station.Latitude"    "Ending.Station.Longitude"  
[11] "Bike.ID"                    "Plan.Duration"             
[13] "Trip.Route.Category"        "Passholder.Type"           
[15] "Starting.Lat.Long"          "Ending.Lat.Long"           

produce a duration column (by minute)

In [5]:
bike_data$Duration_Mins = bike_data$Duration/60
In [6]:
cols <- colnames(bike_data)
print(cols)
 [1] "Trip.ID"                    "Duration"                  
 [3] "Start.Time"                 "End.Time"                  
 [5] "Starting.Station.ID"        "Starting.Station.Latitude" 
 [7] "Starting.Station.Longitude" "Ending.Station.ID"         
 [9] "Ending.Station.Latitude"    "Ending.Station.Longitude"  
[11] "Bike.ID"                    "Plan.Duration"             
[13] "Trip.Route.Category"        "Passholder.Type"           
[15] "Starting.Lat.Long"          "Ending.Lat.Long"           
[17] "Duration_Mins"             

import plot graph library

In [7]:
library(ggplot2)

Duration Mins

In [8]:
ggplot(data = bike_data, aes( x = Duration_Mins) ) +
    geom_bar(fill = "lightblue", colour = "black")

Route Category

In [9]:
ggplot(data = bike_data, aes( x = Trip.Route.Category) ) +
    geom_bar(fill = "lightblue", colour = "black")

Passholder Type

In [10]:
ggplot(data = bike_data, aes( x = Passholder.Type) ) +
    geom_bar(fill = "lightblue", colour = "black")

library(devtools) install_github("dkahle/ggmap")

Import Library for plot data on map

In [11]:
library(ggmap)
library(mapproj)
library(dplyr)
library(forcats)
Google Maps API Terms of Service: http://developers.google.com/maps/terms.
Please cite ggmap if you use it: see citation("ggmap") for details.
Loading required package: maps

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

In [12]:
register_google(key = "AIzaSyBiC7HYahM49Hh4_tvdnKQtjh7Ho6bGdew", day_limit = 1000)
ggmap_credentials()
Google - 
   key : AIzaSyBiC7HYahM49Hh4_tvdnKQtjh7Ho6bGdew 
   account_type : standard 
   day_limit : 1000 
   second_limit : 50 
   client :  
   signature :  
In [13]:
map <- get_map(location = 'Los Angeles', zoom = 14)
# ggmap(map)
Source : https://maps.googleapis.com/maps/api/staticmap?center=Los+Angeles&zoom=14&size=640x640&scale=2&maptype=terrain&language=en-EN&key=AIzaSyBiC7HYahM49Hh4_tvdnKQtjh7Ho6bGdew
Source : https://maps.googleapis.com/maps/api/geocode/json?address=Los%20Angeles&key=AIzaSyBiC7HYahM49Hh4_tvdnKQtjh7Ho6bGdew
In [14]:
? geom_point

Start point and end point of route

In [15]:
p <- ggmap(map)
p + geom_point(data=bike_data, aes(x=Starting.Station.Longitude, y=Starting.Station.Latitude), color="red", size=0.5, alpha=0.7) 
p + geom_point(data=bike_data, aes(x=Ending.Station.Longitude, y=Ending.Station.Latitude), color="yellow", size=0.5, alpha=0.7)
Warning message:
“Removed 514 rows containing missing values (geom_point).”
Warning message:
“Removed 1708 rows containing missing values (geom_point).”
In [16]:
p <- ggmap(map)
p + geom_density2d(data = bike_data, aes(x = Starting.Station.Longitude, y = Starting.Station.Latitude))
#     stat_density2d(data = W, aes(x = lon, y = lat), size = 0.01, bins = 16, geom = 'polygon') +
    scale_fill_gradient(low = "green", high = "red") +
    scale_alpha(range = c(0.00, 0.25), guide = FALSE) +
    theme(legend.position = "none", axis.title = element_blank(), text = element_text(size = 12))
Warning message:
“Removed 514 rows containing non-finite values (stat_density2d).”
Error: Cannot add ggproto objects together. Did you forget to add this object to a ggplot object?
Traceback:

1. `+.gg`(scale_fill_gradient(low = "green", high = "red"), scale_alpha(range = c(0, 
 .     0.25), guide = FALSE))
2. stop("Cannot add ggproto objects together.", " Did you forget to add this object to a ggplot object?", 
 .     call. = FALSE)
In [ ]:
(starting_point[1])
In [ ]:
for (col in cols){
    print(col)
    ggplot(data = bike_data, aes( x = bike_data)) +
      geom_bar(fill = "lightblue", colour = "black")
}